program define selection_endog_5gr, eclass 
	version 12.1
	syntax varlist(min=1) [if] [in], GEN(name) REFvar(varname) XMIN(real) XMAX(real) TITLEline(string) FILEname(string) IV(varlist) WAGESEEN(varname) DEP(varname) BOOTSTRAP(integer) STUBNEW(string) [HOMEgroup(integer 0) GRID(integer 50) CDFOFF(integer 0) CDFPLOT(integer 0) SUBPREDD(integer 0) SUBPREDVAR(varname) SMOOTHOFF(integer 0) SLEEP(integer 0)]

tempvar tempx tempy tempy_smooth
capture drop `gen'
	
	*****************************
	* INITIALIZE
	*****************************

local N_iterations=50

	*****************************
	* SET UP LOCAL -ifext-
	*****************************

if "`if'"=="" {
	
	local ifext="if `refvar'!=."
		
}
if "`if'"!="" {
	
	local ifext="`if' & `refvar'!=."
		
}

	*****************************
	* FIRST STAGE
	*****************************

tempvar xbhat mills probit_sample

probit `wageseen' `iv' `varlist' `if' `in', iterate(`N_iterations')
local N_probit=e(N)

gen `probit_sample'=e(sample)
predict `xbhat' if e(sample)==1, xb
qui gen `mills'=normalden(`xbhat')/normprob(`xbhat')

	*****************************
	* SECOND STAGE
	*****************************
	
if "`if'"=="" {
	local ifext="if `refvar'==`homegroup'"
}
if "`if'"!="" {
	local ifext="`if' & `refvar'==`homegroup'"
}

reg `dep' `varlist' `mills' `ifext' `in'

local N_wagereg=e(N)
local R2_wagereg=e(r2)

	*****************************
	* PREDICT
	*****************************

replace `mills'=0

if "`if'"=="" {	
	local ifext="if `refvar'!=. "
}
if "`if'"!="" {
	local ifext="`if' & `refvar'!=." 
}

qui predict `gen' `ifext' `in'

	*************************
	* NOW RESTRICT -gen-, IF SUB_PRED_D==1
	*************************

if `subpredd'==1 {
	tab `refvar' `subpredvar' if `gen'!=., m
	replace `gen'=. if `subpredvar'!=1 & `gen'!=.
}

qui sum `gen'
local N_pred=r(N)

	*****************************
	* TEST
	*****************************

local sizeJ=0
qui levels `refvar', local(levels) 
foreach ll of local levels {
	local sizeJ=`sizeJ'+1
}

matrix define ksmirnov_pval=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_KS=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_pval1s=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_KS1s=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_pval1srev=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_KS1srev=J(`sizeJ',`sizeJ',.)

local count1=1
local count2=1

foreach ll1 of local levels {
	foreach ll2 of local levels {	
			
		if `ll1'!=`ll2' {
			qui gen aux=1 if `refvar'==`ll1'
			qui replace aux=0 if `refvar'==`ll2'

			qui ksmirnov `gen' if `refvar'==`ll1' | `refvar'==`ll2', by(aux) /* exact */
			local pval=r(p_cor)
			local stat=r(D)
			local stat1s=r(D_1)
			local pval1s=r(p_1)
			local stat1srev=r(D_2)
			local pval1srev=r(p_2)
			matrix ksmirnov_pval[`count2',`count1'] = `pval'
			matrix ksmirnov_KS[`count2',`count1'] = `stat'
			matrix ksmirnov_pval1s[`count2',`count1'] = `pval1s'
			matrix ksmirnov_KS1s[`count2',`count1'] = `stat1s'
			matrix ksmirnov_pval1srev[`count2',`count1'] = `pval1srev'
			matrix ksmirnov_KS1srev[`count2',`count1'] = `stat1srev'
			drop aux
		}
		
		local count2=`count2'+1

	}
	
	local count1=`count1'+1
	local count2=1

}

if `cdfoff'==0 {

	*****************************
	* COMPUTE CDF
	*****************************

local h_factor=0.6

qui gen double `tempx'=`xmin' + (_n/`grid')*(`xmax'-`xmin') in 1/`grid'
label var `tempx' "predicted home wage according to covariates"

foreach ll1 of local levels {

	qui gen double `tempy'=.
	qui gen double `tempy_smooth'=.
	
		*********************************
		* CHOOSE BANDWIDTH
		*********************************
	
	qui sum `gen' if `gen'!=. &  `refvar'==`ll1' 
	local sd=r(sd)
	local N=r(N)
	local h_cdf=`h_factor'*1.06*`sd'*(`N'^(-1/5))
	
		*********************************
		* END BANDWIDTH
		*********************************
	
	forval nn=1/`grid' {
		qui gen tempaux=.
		qui replace tempaux=(`gen'<=`tempx'[`nn']) if `gen'!=. & `tempx'[`nn']!=. &  `refvar'==`ll1'
		qui sum tempaux, mean
		qui replace `tempy'=r(mean) if _n==`nn'
		drop tempaux
	
		if `smoothoff'==0 {
			qui gen tempaux_smooth=.
			qui replace tempaux_smooth=normal((-`gen'+`tempx'[`nn'])/`h_cdf') if `gen'!=. & `tempx'[`nn']!=. &  `refvar'==`ll1'
			qui sum tempaux_smooth, mean
			qui replace `tempy_smooth'=r(mean) if _n==`nn'
			drop tempaux_smooth
		}
	}
	
	tempvar tempy_`ll1'
	rename `tempy' `tempy_`ll1''
	
	tempvar tempy_smooth_`ll1'
	rename `tempy_smooth' `tempy_smooth_`ll1''

}

} /* cdfoff */

	*****************************
	* IMPORT DATA FROM BASE RUN
	*****************************

capture drop `stubnew'_*

if `bootstrap'==1 {
	getmata base_group base_gen base_sel, replace force
}
if `bootstrap'==0 {
	qui gen `stubnew'_group=`refvar'
	qui gen `stubnew'_gen=0
	qui gen `stubnew'_sel=`refvar'!=.
}

	*********************************
	* NOW MANUAL KS-TEST
	*********************************

matrix define ksmirnov_DiDmax=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_DiDabs=J(`sizeJ',`sizeJ',.)
matrix define ksmirnov_DiDmaxrev=J(`sizeJ',`sizeJ',.)

local count1=1
local count2=1

foreach ll1 of local levels {
foreach ll2 of local levels {

	********************
	* START MANUAL KS-TEST
	********************

tempvar ks_group_d ks_sel_d base_group_d base_sel_d

qui gen `ks_group_d'=(`refvar'==`ll1') if ((`refvar'==`ll1') | (`refvar'==`ll2'))
qui gen `ks_sel_d'=((`refvar'==`ll1') | (`refvar'==`ll2')) & `gen'!=.

qui gen `base_group_d'=(`stubnew'_group==`ll1') if ((`stubnew'_group==`ll1') | (`stubnew'_group==`ll2'))
qui gen `base_sel_d'=((`stubnew'_group==`ll1') | (`stubnew'_group==`ll2')) & `stubnew'_gen!=.

ks_manual_DiD, d(`ks_group_d') x(`gen') sel(`ks_sel_d') dbase(`base_group_d') xbase(`stubnew'_gen) selbase(`base_sel_d')
local ks_DiD_abs=r(ks_DiD_abs)
local ks_DiD_0lt1=r(ks_DiD_0lt1)
local ks_DiD_1lt0=r(ks_DiD_1lt0)

matrix ksmirnov_DiDmax[`count2',`count1']=`ks_DiD_0lt1'
matrix ksmirnov_DiDabs[`count2',`count1']=`ks_DiD_abs'
matrix ksmirnov_DiDmaxrev[`count2',`count1']=`ks_DiD_1lt0'

drop `ks_group_d' `ks_sel_d' 

	********************
	* END MANUAL KS-TEST
	********************

local count2=`count2'+1

} /* ll2 */
	
local count1=`count1'+1
local count2=1
 
} /* l11 */

	*****************************
	* EXPORT DATA
	*****************************

if `cdfoff'==0 {

preserve
	qui gen x_cdf=`tempx'
	local txt3="x_cdf"
	foreach ll1 of local levels {
		qui gen y_cdf_`ll1'=`tempy_`ll1'' 
		qui gen y_cdf_smooth_`ll1'=`tempy_smooth_`ll1''
		local txt3="`txt3' y_cdf_`ll1' y_cdf_smooth_`ll1'"
	}
	keep `txt3'
	qui keep if _n<=`grid'
	di "Note: output saved as output_`filename'. $S_DATE, $S_TIME"
	save "${output}/output_`filename'.dta", replace
restore

} /* cdfoff */

	*****************************
	* EXPORT STUBNEW
	*****************************

capture drop `stubnew'_*

if `bootstrap'==0 {	
	qui gen `stubnew'_group=`refvar'
	qui gen `stubnew'_gen = `gen'
	qui gen `stubnew'_sel=`refvar'!=. & `gen'!=.

}

	*****************************
	* PREPARE RETURN VECTOR
	*****************************

foreach yy in "KS1s" "KS1srev" "pval1s" "pval1srev" "DiDmax" "DiDmaxrev" {

	local home_unequal_`yy'=ksmirnov_`yy'[2,3]	/* 1 vs 2 */
	local equal_home_`yy'=ksmirnov_`yy'[1,2]	/* 0 vs 1 */
	local equal_unequal_`yy'=ksmirnov_`yy'[1,3]	/* 0 vs 2 */

	local equal_Sequal_`yy'=ksmirnov_`yy'[1,5]	/* 0 vs 4 */ 
	local Sequal_home_`yy'=ksmirnov_`yy'[5,2]	/* 4 vs 1 */ 
	local home_Sunequal_`yy'=ksmirnov_`yy'[2,4]	/* 1 vs 3 */ 
	local Sunequal_unequal_`yy'=ksmirnov_`yy'[4,3]	/* 3 vs 2 */ 

}

	***************
	* PREPARE FOR OUTPUT
	***************

matrix define b=(`equal_home_KS1s',`home_unequal_KS1s',`equal_unequal_KS1s', `equal_Sequal_KS1s',`Sequal_home_KS1s',`home_Sunequal_KS1s',`Sunequal_unequal_KS1s', ///
`equal_home_KS1srev',`home_unequal_KS1srev',`equal_unequal_KS1srev', ///
`equal_Sequal_KS1srev',`Sequal_home_KS1srev',`home_Sunequal_KS1srev',`Sunequal_unequal_KS1srev', ///
`equal_home_pval1s',`home_unequal_pval1s',`equal_unequal_pval1s', ///
`equal_Sequal_pval1s',`Sequal_home_pval1s',`home_Sunequal_pval1s',`Sunequal_unequal_pval1s', ///
`equal_home_pval1srev',`home_unequal_pval1srev',`equal_unequal_pval1srev', ///
`equal_Sequal_pval1srev',`Sequal_home_pval1srev',`home_Sunequal_pval1srev',`Sunequal_unequal_pval1srev', ///
`home_unequal_DiDmax',`equal_home_DiDmax',`equal_unequal_DiDmax', ///
`equal_Sequal_DiDmax',`Sequal_home_DiDmax',`home_Sunequal_DiDmax',`Sunequal_unequal_DiDmax', ///
`home_unequal_DiDmaxrev',`equal_home_DiDmaxrev',`equal_unequal_DiDmaxrev', ///
`equal_Sequal_DiDmaxrev',`Sequal_home_DiDmaxrev',`home_Sunequal_DiDmaxrev',`Sunequal_unequal_DiDmaxrev', ///
`N_wagereg',`R2_wagereg',`N_pred',`N_probit')

matrix colnames b="equal_home_KS1s" "home_unequal_KS1s" "equal_unequal_KS1s" ///
"equal_Sequal_KS1s" "Sequal_home_KS1s" "home_Sunequal_KS1s" "Sunequal_unequal_KS1s" ///
"equal_home_KS1srev" "home_unequal_KS1srev" "equal_unequal_KS1srev" ///
"equal_Sequal_KS1srev" "Sequal_home_KS1srev" "home_Sunequal_KS1srev" "Sunequal_unequal_KS1srev" ///
"equal_home_pval1s" "home_unequal_pval1s" "equal_unequal_pval1s" ///
"equal_Sequal_pval1s" "Sequal_home_pval1s" "home_Sunequal_pval1s" "Sunequal_unequal_pval1s" ///
"equal_home_pval1srev" "home_unequal_pval1srev" "equal_unequal_pval1srev" ///
"equal_Sequal_pval1srev" "Sequal_home_pval1srev" "home_Sunequal_pval1srev" "Sunequal_unequal_pval1srev" ///
"home_unequal_DiDmax" "equal_home_DiDmax" "equal_unequal_DiDmax" ///
"equal_Sequal_DiDmax" "Sequal_home_DiDmax" "home_Sunequal_DiDmax" "Sunequal_unequal_DiDmax" ///
"home_unequal_DiDmaxrev" "equal_home_DiDmaxrev" "equal_unequal_DiDmaxrev" ///
"equal_Sequal_DiDmaxrev" "Sequal_home_DiDmaxrev" "home_Sunequal_DiDmaxrev" "Sunequal_unequal_DiDmaxrev" ///
"N_wagereg" "R2_wagereg" "N_pred" "N_probit"

	*****************************
	* PREPARE -ERETURN-
	*****************************

qui gen sample=(`gen'!=.)
ereturn post b, esample(sample)

if `sleep'>0 {
	sleep `sleep'
}

end

	*
